knitr::opts_chunk$set(echo=TRUE,eval=TRUE, message=FALSE, warning=FALSE)
Load packages:
library(readxl)
library(foreign)
library(haven)
library(magrittr)
library(dplyr)
library(ggplot2)
library(plotly)
library(ggmap)
library(ggthemes)
library(ggpubr)
library(tidyverse)
library(rgdal)
library(osmdata)
library(nominatim)
library(jsonlite)
library(RColorBrewer)
library(DT)
library(tidyr)
library(leaflet)
library(formattable)
library(tigris)
library(htmlwidgets)
library(sf)
library(gganimate)
library(webshot)
library(maptools)
library(rgeos)
library(rgdal)
We want to explore the relationship of in person voting VS mail-in voting and Covid-19 case development.
Firstly, we argue that the overall pandemic situation before the voting period starts might lead to people’s preference of mail-in voting. This could be somewhat proved by the fact that people who are absence of in person voting could register for mail ballot and fill in “Covid” as the excuse.
To understand this, a graph of the change in mail-in voting preference from 2016 election to 2020 election could be drafted. Specifically, the background knowledge suggests that Trump administration and its supporters oppose mail-in voting, so we’ll also be looking at the difference of mail-in voting preference change from 2016 election to 2020 election for Democrats and Republicans respectively.
Read in information on the percentage of mail-in voting as well as the proportion of people of different parties within each state. Specifically, data chosen from SPAE is top-line statistics from nationwide study, by state, weighted data.
Data Source: Stewart, Charles, 2021, “2020 Survey of the Performance of American Elections”, https://doi.org/10.7910/DVN/FSGX7Z, Harvard Dataverse, V1, UNF:6:70KW4uouuTDT860MiPJq3A== [fileUNF]
vote<-read.csv("/Users/annie/Documents/GitHub/Group_H_Election_Covid/Peishan_Li/weightedvote.csv")
pid3<-read.csv("/Users/annie/Documents/GitHub/Group_H_Election_Covid/Peishan_Li/pid3.csv")
bystate<-inner_join(vote, pid3, by='id')
bystate<-rename(bystate, c("state"="id"))
bystate<-rename(bystate, c("mail"="Voted.by.mail.or.absentee.ballot.by.mail..including.dropping.off.a.ballot.that.was.mailed.to.you."))
bystate<-bystate %>%
select(state, mail, Democrat, Republican) %>%
mutate(DandR=Democrat+Republican) %>%
mutate(Democratpercent=percent(Democrat/DandR))%>%
mutate(Republicanpercent=percent(Republican/DandR))
bystate$mail<-percent(bystate$mail/100)
#Prepare color information for further plotting
for (i in 1:51){
if (bystate$Democrat[i]<bystate$Republican[i]){
bystate$party[i]="red"
}else if (bystate$Democrat[i]>bystate$Republican[i]){
bystate$party[i]="blue"
}else if (bystate$Democrat[i]==bystate$Republican[i]){
bystate$party[i]="#b2aeae"
}
}
bystate
In the meantime, according to past final reports from SPAE also available from the link above, the mail-in voting percentage of each past election at country level could be summarized as:
overallmailpercentage<-data.frame(Year=c(2000, 2004, 2008, 2012, 2016, 2020), Percentage=c(percent(0.10), percent(0.13), percent(0.16), percent(0.19), percent(0.21), percent(0.46)))
overallmailpercentage
overall<-overallmailpercentage %>%
ggplot(.,aes(x=Year,y=Percentage))+
theme_bw()+
geom_line(color="lightsteelblue2")+
geom_point(aes(size=Percentage), color="coral")+
labs(x="Year of Election", y="Percentage of Mail-in Voting", title="Percentage of Mail-in Voting in Each Election")+
scale_size_continuous(guide=FALSE)+
scale_x_discrete(limits=c(2000, 2004, 2008, 2012, 2016, 2020))+
theme(plot.title=element_text(hjust = 0.5))
overall
The graphs display a general sharp increase in mail-in voting percentage from 2016 to 2020, which could well be due to the Covid-19 development.
vote2020<-read_sav("/Users/annie/Documents/GitHub/Group_H_Election_Covid/Peishan_Li/2020vote.sav")
vote2020<-data.frame(vote2020)
vote2020$mail<-ifelse(vote2020$Q4==3, 1, 0)
analysis2020<-vote2020 %>%
select(mail, pid3) %>%
group_by(pid3) %>%
mutate(partytotal=n()) %>%
ungroup() %>%
group_by(pid3, mail) %>%
mutate(partyvotetype=n()) %>%
mutate(percentpartyvotetype=percent(partyvotetype/partytotal)) %>%
filter(row_number()==1) %>%
select(pid3, mail, partytotal, partyvotetype, percentpartyvotetype) %>%
filter(pid3==1|pid3==2) %>%
filter(mail==1) %>%
arrange(pid3)
analysis2020$Year<-2020
analysis2020$party[analysis2020$pid3==1]<-"Democrat"
analysis2020$party[analysis2020$pid3==2]<-"Republican"
analysis2020$partycolor[analysis2020$party=="Democrat"]<-"blue"
analysis2020$partycolor[analysis2020$party=="Republican"]<-"red"
analysis2020<-data.frame(analysis2020)
analysis2020<-analysis2020 %>%
select(party, mail, partytotal, partyvotetype, percentpartyvotetype, Year, partycolor)
analysis2020
vote2016<-read.dta("/Users/annie/Documents/GitHub/Group_H_Election_Covid/Peishan_Li/2016vote.dta")
vote2016$mail<-ifelse(vote2016$Q4=="Voted by mail or absentee ballot my mail", 1, 0)
analysis2016<-vote2016 %>%
select(mail, pid3) %>%
group_by(pid3) %>%
mutate(partytotal=n()) %>%
ungroup() %>%
group_by(pid3, mail) %>%
mutate(partyvotetype=n()) %>%
ungroup() %>%
mutate(percentpartyvotetype=percent(partyvotetype/partytotal)) %>%
group_by(pid3, mail) %>%
filter(row_number()==1) %>%
select(pid3,mail, partytotal, partyvotetype, percentpartyvotetype) %>%
filter(pid3=="Democrat"|pid3=="Republican") %>%
filter(mail==1) %>%
arrange(pid3)
analysis2016<-rename(analysis2016, c("party"="pid3"))
analysis2016$Year<-2016
for (i in 1:2){
if (analysis2016$party[i]=="Democrat"){
analysis2016$partycolor[i]<-"blue"
}else if (analysis2016$party[i]=="Republican"){
analysis2016$partycolor[i]<-"red"
}
}
analysis2016
vote2012<-read.dta("/Users/annie/Documents/GitHub/Group_H_Election_Covid/Peishan_Li/2012vote.dta")
vote2012$mail<-ifelse(vote2012$q4=="Voted by mail (or absentee)", 1, 0)
analysis2012<-vote2012 %>%
select(mail, pid3) %>%
group_by(pid3) %>%
mutate(partytotal=n()) %>%
ungroup() %>%
group_by(pid3, mail) %>%
mutate(partyvotetype=n()) %>%
ungroup() %>%
mutate(percentpartyvotetype=percent(partyvotetype/partytotal)) %>%
group_by(pid3, mail) %>%
filter(row_number()==1) %>%
select(pid3,mail, partytotal, partyvotetype, percentpartyvotetype) %>%
filter(pid3=="Democrat"|pid3=="Republican") %>%
filter(mail==1) %>%
arrange(pid3)
analysis2012<-rename(analysis2012, c("party"="pid3"))
analysis2012$Year<-2012
for (i in 1:2){
if (analysis2012$party[i]=="Democrat"){
analysis2012$partycolor[i]<-"blue"
}else if (analysis2012$party[i]=="Republican"){
analysis2012$partycolor[i]<-"red"
}
}
analysis2012
vote2008<-read.dta("/Users/annie/Documents/GitHub/Group_H_Election_Covid/Peishan_Li/2008vote.dta")
vote2008$mail<-ifelse(vote2008$q5=="voted by mail (or absentee)", 1, 0)
analysis2008<-vote2008 %>%
select(mail, pid3) %>%
group_by(pid3) %>%
mutate(partytotal=n()) %>%
ungroup() %>%
group_by(pid3, mail) %>%
mutate(partyvotetype=n()) %>%
ungroup() %>%
mutate(percentpartyvotetype=percent(partyvotetype/partytotal)) %>%
group_by(pid3, mail) %>%
filter(row_number()==1) %>%
select(pid3,mail, partytotal, partyvotetype, percentpartyvotetype) %>%
filter(pid3=="democrat "|pid3=="republican ") %>%
filter(mail==1) %>%
arrange(pid3)
analysis2008<-rename(analysis2008, c("party"="pid3"))
analysis2008$Year<-2008
analysis2008$party<-as.character(analysis2008$party)
analysis2008$party[analysis2008$party=="democrat "]<-"Democrat"
analysis2008$party[analysis2008$party=="republican "]<-"Republican"
analysis2008$partycolor[analysis2008$party=="Democrat"]<-"blue"
analysis2008$partycolor[analysis2008$party=="Republican"]<-"red"
analysis2008
Combine dataset.
analysisbyparty<-rbind(analysis2020, analysis2016, analysis2012, analysis2008)
analysisbyparty<-rename(analysisbyparty, c("Percentage"="percentpartyvotetype"))
analysisbyparty
byparty<-analysisbyparty %>%
ggplot(.,aes(x=Year,y=Percentage, group=party))+
theme_bw()+
geom_line(aes(color=party))+
geom_point(aes(color=party))+
scale_color_manual(values=c("Democrat"="blue", "Republican"="red"))+
theme(legend.position="right")+
annotate("text", x=2018, y=0.55, label="Democratic Party", color="blue")+
annotate("text", x=2019, y=0.35, label="Republican Party", color="red")+
labs(x="Year of Election", y="Percentage of Mail-in Voting", title="Percentage of Mail-in Voting by Party")+
scale_size_continuous(guide=FALSE)+
scale_x_discrete(limits=c(2008, 2012, 2016, 2020))+
theme(plot.title=element_text(hjust = 0.5))
byparty
There is also a sharp increase from 2016 to 2020 by party. Particularly, Democrats favor the idea of mail-in voting much more than Republicans.
We’ll then explore if different voting mechanisms could have an impact on number of cases. How does the percentage of mail-in voting affects the increase of Covid-19 cases? Do democratic states and republican states differ in the increase of Covid-19 cases during the whole voting period, since they might have different policies and preferences regarding in person vs mail-in voting at state government level?
Obtain longitude and latitude data of US states.
us.states<-map_data("state")
us.states<-as_data_frame(us.states)
us.states<-rename(us.states, c("state"="region"))
us.states$subregion<-NULL
us.states$state<-str_to_title(us.states$state)
#Add state abbreviations and centers
statenames<-as_data_frame(cbind(state=state.name, state.abb=state.abb,
state.center.x=state.center$x, state.center.y=state.center$y))
statenames<-statenames %>%
mutate_each_(funs(as.numeric), vars=c("state.center.x","state.center.y"))
us.states<-left_join(us.states, statenames)
us.states
Find geoid for each us states. Possible reference link could be kjhealy/fips-codes
geoid<-read.csv("/Users/annie/Documents/GitHub/Group_H_Election_Covid/Peishan_Li/state_fips_master.csv")
geoid<-rename(geoid, c("state.abb"="state_abbr"))
geoid<-rename(geoid, c("GEOID"="fips"))
geoid<-geoid %>%
select(state.abb, GEOID)
#Convert GEOID into characters
geoid$GEOID<-as.character(geoid$GEOID)
#Certain fip-codes need to be pre-processed
geoid$GEOID[geoid$state.abb=="AL"]<-"01"
geoid$GEOID[geoid$state.abb=="AK"]<-"02"
geoid$GEOID[geoid$state.abb=="AZ"]<-"04"
geoid$GEOID[geoid$state.abb=="AR"]<-"05"
geoid$GEOID[geoid$state.abb=="CA"]<-"06"
geoid$GEOID[geoid$state.abb=="CO"]<-"08"
geoid$GEOID[geoid$state.abb=="CT"]<-"09"
#Add fip-codes for District of Columbia
DC<-c("DC","11")
geoid<-rbind(geoid,DC)
geoid
Combine state mail dataset with state abbreviations.
usfullabb<-us.states %>%
select(state, state.abb) %>%
group_by(state) %>%
filter(row_number()==1) %>%
ungroup()
bystatejoin<-left_join(bystate, usfullabb, by="state")
#A few states miss state abbreviations. Complete the list.
bystatejoin$state.abb[bystatejoin$state=="Alaska"]="AK"
bystatejoin$state.abb[bystatejoin$state=="District of Columbia"]="DC"
bystatejoin$state.abb[bystatejoin$state=="Hawaii"]="HI"
bystatejoin
Combine dataset for number of successful projects with geoid.
bystategeoid<-left_join(bystatejoin, geoid, by="state.abb")
bystategeoid
Obtain shape files of US states using the tigris package.
stateshape<-states(cb=TRUE)
|
| | 0%
|
|= | 1%
|
|== | 1%
|
|== | 2%
|
|=== | 2%
|
|=== | 3%
|
|==== | 3%
|
|==== | 4%
|
|===== | 4%
|
|===== | 5%
|
|====== | 5%
|
|====== | 6%
|
|======= | 6%
|
|======= | 7%
|
|======== | 7%
|
|======== | 8%
|
|========= | 8%
|
|========== | 9%
|
|=========== | 10%
|
|============ | 11%
|
|============= | 12%
|
|============== | 13%
|
|=============== | 14%
|
|================ | 14%
|
|================ | 15%
|
|================= | 15%
|
|================= | 16%
|
|================== | 16%
|
|================== | 17%
|
|==================== | 18%
|
|===================== | 19%
|
|====================== | 19%
|
|====================== | 20%
|
|======================= | 20%
|
|======================= | 21%
|
|======================== | 21%
|
|======================== | 22%
|
|========================= | 22%
|
|========================= | 23%
|
|========================== | 23%
|
|=========================== | 24%
|
|============================ | 25%
|
|============================= | 26%
|
|============================= | 27%
|
|============================== | 27%
|
|=============================== | 28%
|
|================================ | 28%
|
|================================ | 29%
|
|================================= | 29%
|
|================================= | 30%
|
|================================== | 30%
|
|================================== | 31%
|
|=================================== | 31%
|
|=================================== | 32%
|
|==================================== | 32%
|
|==================================== | 33%
|
|===================================== | 34%
|
|====================================== | 34%
|
|======================================= | 35%
|
|======================================== | 36%
|
|========================================= | 37%
|
|========================================== | 38%
|
|============================================= | 40%
|
|============================================== | 41%
|
|============================================== | 42%
|
|=============================================== | 42%
|
|=============================================== | 43%
|
|================================================ | 43%
|
|================================================= | 44%
|
|================================================== | 45%
|
|=================================================== | 46%
|
|==================================================== | 46%
|
|==================================================== | 47%
|
|===================================================== | 47%
|
|====================================================== | 48%
|
|======================================================= | 49%
|
|======================================================= | 50%
|
|======================================================== | 50%
|
|======================================================== | 51%
|
|========================================================= | 51%
|
|========================================================= | 52%
|
|========================================================== | 52%
|
|========================================================== | 53%
|
|=========================================================== | 53%
|
|============================================================ | 54%
|
|============================================================= | 55%
|
|============================================================== | 56%
|
|================================================================ | 58%
|
|================================================================= | 58%
|
|================================================================= | 59%
|
|================================================================== | 60%
|
|=================================================================== | 60%
|
|==================================================================== | 61%
|
|==================================================================== | 62%
|
|===================================================================== | 62%
|
|===================================================================== | 63%
|
|====================================================================== | 63%
|
|======================================================================= | 64%
|
|======================================================================== | 65%
|
|========================================================================= | 65%
|
|========================================================================= | 66%
|
|========================================================================== | 66%
|
|=========================================================================== | 67%
|
|============================================================================= | 69%
|
|============================================================================== | 70%
|
|=============================================================================== | 71%
|
|================================================================================ | 72%
|
|================================================================================== | 74%
|
|==================================================================================== | 75%
|
|==================================================================================== | 76%
|
|===================================================================================== | 76%
|
|===================================================================================== | 77%
|
|============================================================================================ | 83%
|
|============================================================================================== | 84%
|
|=============================================================================================== | 85%
|
|================================================================================================ | 87%
|
|=================================================================================================== | 89%
|
|==================================================================================================== | 90%
|
|====================================================================================================== | 92%
|
|=========================================================================================================== | 96%
|
|===============================================================================================================| 100%
Read in covid cases data. Data source is United States CDC
covid<-read.csv("/Users/annie/Documents/GitHub/Group_H_Election_Covid/Peishan_Li/United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv")
Keep only data during the voting period. The election day is November 3, and the earliest voting time is 46 days before the election day. Reference from Early Voting Calendar. The end of our observation date is 7 days after the election day.
covid$submission_date<-as.Date(covid$submission_date, "%m/%d/%Y")
#Filter data during the voting peiord
covid<-covid %>%
arrange(submission_date) %>%
filter(submission_date>="2020-09-17" &submission_date<="2020-11-10")
#General view
covid<-covid %>%
group_by(submission_date, state) %>%
mutate(dailytotal=sum(tot_cases)) %>%
ungroup() %>%
group_by(state) %>%
mutate(statetotalduringvoting=sum(tot_cases)) %>%
ungroup() %>%
select(submission_date, state, tot_cases, dailytotal, statetotalduringvoting)
covid<-rename(covid, "state.abb"="state")
covid
To make the number of cases comparable, they need to be weighted by state population. Data Source is United States Census Bureau
population<-read.csv("/Users/annie/Documents/GitHub/Group_H_Election_Covid/Peishan_Li/nst-est2020.csv")
Keep only population estimate of 2020 and at state level.
population<-population %>%
select(NAME, POPESTIMATE2020) %>%
filter(NAME!="United States" & NAME!="Northeast Region" & NAME!="Midwest Region" & NAME!="South Region" & NAME!="West Region")
population<-rename(population, c("state"="NAME"))
population<-rename(population, c("popestimate"="POPESTIMATE2020"))
population
Combine covid dataset with population dataset and calculate the percentages of cases within the state populations.
population<-left_join(population, usfullabb, by="state")
population$state.abb[population$state=="Alaska"]="AK"
population$state.abb[population$state=="District of Columbia"]="DC"
population$state.abb[population$state=="Hawaii"]="HI"
population
covidw<-left_join(population, covid, by="state.abb")
covidw
covidw<-covidw %>%
arrange(submission_date) %>%
mutate(dailytotalw=round(dailytotal/popestimate*1000000)) %>%
mutate(statetotalw=round(statetotalduringvoting/popestimate*1000000))
covidw
Total number in each state weighted
totalcovid<-covidw %>%
group_by(state.abb) %>%
filter(row_number()==1) %>%
ungroup() %>%
select(state, popestimate, state.abb, statetotalduringvoting, statetotalw)
totalcovid
totalcovid<-rename(totalcovid, c("STATE"="state"))
totalcovidgeoid<-left_join(totalcovid, geoid, by="state.abb")
alldata<-left_join(totalcovidgeoid, bystategeoid, by="GEOID")
alldata$state.abb.y<-NULL
alldata$state<-NULL
alldata<-rename(alldata, "state.abb"="state.abb.x")
alldata$mail<-as.numeric(alldata$mail)
alldata
Merge state map data with weighted covid cases.
allbystate=geo_join(stateshape, alldata, "STATEFP", "GEOID")
allbystate
Simple feature collection with 56 features and 22 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1489 ymin: -14.5487 xmax: 179.7785 ymax: 71.36516
Geodetic CRS: NAD83
First 10 features:
STATEFP STATENS AFFGEOID GEOID STUSPS NAME LSAD ALAND AWATER
1 12 00294478 0400000US12 12 FL Florida 00 138947364717 31362872853
2 78 01802710 0400000US78 78 VI United States Virgin Islands 00 348021896 1550236199
3 30 00767982 0400000US30 30 MT Montana 00 376966832749 3869031338
4 27 00662849 0400000US27 27 MN Minnesota 00 206230065476 18942261495
5 24 01714934 0400000US24 24 MD Maryland 00 25151726296 6979340970
6 45 01779799 0400000US45 45 SC South Carolina 00 77864659170 5075874513
7 23 01779787 0400000US23 23 ME Maine 00 79887659040 11745717739
8 15 01779782 0400000US15 15 HI Hawaii 00 16634006436 11777792811
9 11 01702382 0400000US11 11 DC District of Columbia 00 158340389 18687196
10 69 01779809 0400000US69 69 MP Commonwealth of the Northern Mariana Islands 00 472292529 4644252458
STATE popestimate state.abb statetotalduringvoting statetotalw mail Democrat Republican DandR
1 Florida 21733312 FL 40741674 1874619 0.4768405 37.80221 31.526210 69.32842
2 <NA> NA <NA> NA NA NA NA NA NA
3 Montana 1080577 MT 1220613 1129594 0.8865553 28.04969 28.768770 56.81846
4 Minnesota 5657342 MN 6744329 1192137 0.5044857 43.25591 24.136840 67.39275
5 Maryland 6055802 MD 7426305 1226312 0.6022226 49.72233 18.556010 68.27834
6 South Carolina 5218040 SC 8916426 1708769 0.2667850 26.55966 34.577920 61.13758
7 Maine 1350141 ME 332357 246165 0.4672680 36.41784 27.790270 64.20811
8 Hawaii 1407006 HI 748848 532228 0.9101848 47.71746 15.341520 63.05898
9 District of Columbia 712816 DC 892451 1252008 0.7028656 76.07684 2.372092 78.44893
10 <NA> NA <NA> NA NA NA NA NA NA
Democratpercent Republicanpercent party rank geometry
1 54.53% 45.47% blue 1 MULTIPOLYGON (((-80.17628 2...
2 NA NA <NA> NA MULTIPOLYGON (((-64.62799 1...
3 49.37% 50.63% red 1 MULTIPOLYGON (((-116.0491 4...
4 64.18% 35.82% blue 1 MULTIPOLYGON (((-89.59206 4...
5 72.82% 27.18% blue 1 MULTIPOLYGON (((-76.05015 3...
6 43.44% 56.56% red 1 MULTIPOLYGON (((-79.50795 3...
7 56.72% 43.28% blue 1 MULTIPOLYGON (((-67.32259 4...
8 75.67% 24.33% blue 1 MULTIPOLYGON (((-156.0608 1...
9 96.98% 3.02% blue 1 MULTIPOLYGON (((-77.11976 3...
10 NA NA <NA> NA MULTIPOLYGON (((146.051 16....
Provide a leaflet map, where polygons are used to reflect the percentage of mail-in voting. Each state is categorized as a typical Democratic state or a Republican state based on the percentage of Democrats and Republicans from the 2020 SPAE, which is represented by the color of the state’s border on the map.
content<-paste("State:", allbystate$STATE, "<br/>",
"Total number of cases:", allbystate$statetotalduringvoting, "<br/>",
"Weighted total number of cases:", allbystate$statetotalw, "<br/>",
"Percentage of mail-in voting:", allbystate$mail, "<br/>",
"Percentage of Democrats in All Democrats and Republicans:", allbystate$Democratpercent, "<br/>",
"Percentage of Republicans in All Democrats and Republicans:", allbystate$Republicanpercent, "<br/>")
pal=colorNumeric(palette="Greens", domain=allbystate$mail)
leafletmap1<-leaflet() %>%
addTiles() %>%
addProviderTiles("Stamen.TonerLite") %>%
setView(-98.1156, 38.4204, zoom=4) %>%
addPolygons(group="Mail-in Voting Percentage", data=allbystate, fillColor=~pal(allbystate$mail), color=allbystate$party, fillOpacity=0.7, weight=2, smoothFactor=0.2, popup=content, label=~stringr::str_c(NAME, 'See pop-up for more info'), labelOptions=labelOptions(direction='auto'), highlightOptions=highlightOptions(color=allbystate$party, weight=5, bringToFront=TRUE, sendToBack=TRUE)) %>%
addLegend("bottomright", pal=pal, values=allbystate$mail, title="State Mail-in Voting Percentage", opacity=1)
leafletmap1
Provide a leaflet map, where polygons are used to reflect the percentage of covid cases increase in the voting period till 7 days after election day. Each state is categorized as a typical Democratic state or a Republican state based on the percentage of Democrats and Republicans from the 2020 SPAE, which is represented by the color of the state’s border on the map.
content<-paste("State:", allbystate$STATE, "<br/>",
"Total number of cases:", allbystate$statetotalduringvoting, "<br/>",
"Weighted total number of cases:", allbystate$statetotalw, "<br/>",
"Percentage of mail-in voting:", allbystate$mail, "<br/>",
"Percentage of Democrats in All Democrats and Republicans:", allbystate$Democratpercent, "<br/>",
"Percentage of Republicans in All Democrats and Republicans:", allbystate$Republicanpercent, "<br/>")
pal=colorNumeric(palette="Oranges", domain=allbystate$statetotalw)
leafletmap2<-leaflet() %>%
addTiles() %>%
addProviderTiles("Stamen.TonerLite") %>%
setView(-98.1156, 38.4204, zoom=4) %>%
addPolygons(data=allbystate, fillColor=~pal(allbystate$statetotalw), color=allbystate$party, fillOpacity=0.7, weight=2, smoothFactor=0.2, popup=content, label=~stringr::str_c(NAME, ' See pop-up for more info'), labelOptions=labelOptions(direction='auto'), highlightOptions=highlightOptions(color=allbystate$party, weight=5, bringToFront=TRUE, sendToBack=TRUE)) %>%
addLegend("bottomright", pal=pal, values=allbystate$statetotalw, title="State Weighted Covid Cases Increase During Voting Period", opacity=1)
leafletmap2
Run a regression to see the relationship between mail-in voting percentage and weighted covid cases and visualize the relationship.
lm<-lm(log(statetotalw)~mail, data=alldata)
summary(lm)
Call:
lm(formula = log(statetotalw) ~ mail, data = alldata)
Residuals:
Min 1Q Median 3Q Max
-1.7131 -0.1719 0.1612 0.3351 0.7315
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 14.2730 0.1571 90.855 <2e-16 ***
mail -0.6944 0.3009 -2.308 0.0253 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.5118 on 49 degrees of freedom
Multiple R-squared: 0.09802, Adjusted R-squared: 0.07961
F-statistic: 5.325 on 1 and 49 DF, p-value: 0.02529
The result suggests that on average, 1 percent increase in mail-in voting proportion leads to a 69.4 percent decrease of weighted covid cases in a state, not taking into other factors into account, and the influence is significant.
Plot the relationship grouped by Democratic and Republican states.
alldata$partyname[alldata$party=="blue"]<-"Democrat"
alldata$partyname[alldata$party=="red"]<-"Republican"
relationship<-ggplot(alldata, aes(x=mail, y=log(statetotalw), group=partyname))+
theme_bw()+
geom_point(aes(color=partyname))+
geom_smooth(aes(color=partyname),method="lm", formula=y~x, se=FALSE)+
scale_color_manual(values=c("Democrat"="blue", "Republican"="red"))+
theme(legend.position="right")+
labs(x="Percentage of mail-in voting", y="Total covid cases in a state (taken logarithm)", title="Relationship between mail-in voting and covid cases increase")+
theme(plot.title=element_text(hjust=0.5))
relationship